In [1]:
import torch
In [2]:
torch.backends.cudnn.enabled = False
In [3]:
def initialize():
global a, b, c
a = torch.nn.Linear(1,10)
b = torch.nn.Linear(10,10)
c = torch.nn.Linear(10,1)
x = torch.tensor([1.])
def forward(x):
return c(b(a(x).relu()).relu())
def backprop(y):
y.backward()
for layer in [c,b,a]:
for param in layer.parameters():
param.data -= .01 * param.grad
param.grad *= 0
def train():
y = forward(x)
backprop(y)
return y
In [4]:
initialize()
print(train())
print(train())
In [5]:
initialize()
for layer in [a,b,c]:
layer.weight.data = torch.ones_like(layer.weight)
In [6]:
print(c.weight)
In [7]:
train(), print(c.weight)
Out[7]:
In [8]:
train(), print(c.weight)
Out[8]:
In [9]:
train(), print(c.weight)
Out[9]:
In [10]:
b.weight #there's a small amount of symmetry breaking, but the fan-ins are pretty similar
Out[10]:
In [11]:
for _ in range(25):
train()
b.weight #now the fan-ins are pretty different
Out[11]:
In [12]:
initialize()
for layer in [a,b,c]:
layer.weight.data = torch.ones_like(layer.weight)
layer.bias.data = torch.ones_like(layer.bias)
In [13]:
print(c.weight, c.bias)
In [14]:
train(), print(c.weight, c.bias)
Out[14]:
In [15]:
train(), print(c.weight, c.bias)
Out[15]:
In [16]:
train(), print(c.weight, c.bias)
Out[16]:
In [17]:
b.weight
Out[17]:
In [18]:
for _ in range(25):
train()
b.weight
Out[18]: